* Settings
version 16
do "$SSDIMed/scripts/_auxiliary/_project_settings.do"


* -------------------------------------------
* Data source: SSA DAF PUF
* Sample: people joining Medicare at age 18-62 in 1993-2017 (analogous to the Medicare sample)
* 
* An observation is an individual beneficiary. For each bene, I measure their
* Age as of "SSDI DATE OF FILING" (pool ages <= 18)
* Age as of "SSDI DATE OF ENTITLEMENT START" (pool ages <= 18)

* Load cleaned ssa daf sample (parallels that of the Medicare sample):
*   - Age 20-62 at time of Medicare eligibility
*   - Gained Medicare eligibility in sample period 1993-2017 
use "$SSDIMed/data/proc/ssa/DAF18/daf18_puf_ann_dmg_sample.dta", clear
gen covstart_year = year(dofm(covstart_month))
assert inrange(covstart_year, 1993, 2017)
assert inrange(age_year_covstart_ssdi, 20, 64)
*keep if inrange(age_year_covstart_ssdi, 20, 62)

* Age at Date Of Initial Entitlement (DOIE)
cap drop age_doie_daf
gen age_doie_daf = (mofd(bdoe_start_puf1) - mofd(dobbest_puf))/12
gen age_doie_daf_cens = min(max(floor(age_doie_daf), 18), 62)

* Age at Date of Filing
gen age_dof_daf = (mofd(bdof_puf1) - mofd(dobbest_puf))/12
gen age_dof_daf_cens = min(max(floor(age_dof_daf), 18), 62)

* Compare age at DOIE vs. age at Medicare start (shifted by 2 years to yield an "imputed" DOIE)
tab age_doie_daf_cens
* tab age_year_covstart_ssdi
tab age_dof_daf_cens

*** QC

* How good is the "imputed" DOIE among everyone entering medicare 22-62?

* Exact
local denom_group inrange(age_year_covstart_ssdi, 22, 62)
count if (age_doie_daf_cens == age_year_covstart_ssdi - 2) &  (`denom_group')
local num = r(N)
count if `denom_group'
local check_num = round(`num'/r(N) * 100, 0.1)
di `check_num'
assert_equal, num(`check_num', 95.8) 

* Within W months
local W 4
local denom_group inrange(age_year_covstart_ssdi, 22, 62)
count if abs(age_doie_daf_cens - (age_year_covstart_ssdi - 2)) <= `W' &  (`denom_group')
local num = r(N)
count if `denom_group'
local check_num = round(`num'/r(N) * 100, 0.1)
di `check_num'
assert_equal, num(`check_num', 99.1) 

* How good is the "imputed" DOIE among primary claimants entering medicare 22-62?
local denom_group (bic_puf == "A" & inrange(age_year_covstart_ssdi, 22, 62))
count if (age_doie_daf_cens == age_year_covstart_ssdi - 2) &  (`denom_group')
local num = r(N)
count if `denom_group'
local check_num = round(`num'/r(N) * 100, 0.1)
di `check_num'
assert_equal, num(`check_num', 96.5) 

* Convert from single year of age to age bins, mirroring ASR age bins/range (ages 20-60)
* Calculate annual average frequency per year of age
foreach age_var in age_doie_daf_cens age_dof_daf_cens {
  * local age_var age_doie_daf_cens
  preserve
  
  * Frequency-weight each obs by 10, to account for 10% sampling in SSA DAF PUF
  gen freq = 10
  gcollapse (sum) freq, by(`age_var' covstart_year)
  gcollapse (mean) freq, by(`age_var')
  
  keep if inrange(`age_var', 20, 60)
  
  * Define age bins
  gen age_mid = `age_var'
  replace age_mid = 18.5 if inrange(`age_var', 18, 19)
  replace age_mid = 22   if inrange(`age_var', 20, 24)
  replace age_mid = 27   if inrange(`age_var', 25, 29)
  replace age_mid = 32   if inrange(`age_var', 30, 34)
  replace age_mid = 37   if inrange(`age_var', 35, 39)
  replace age_mid = 42   if inrange(`age_var', 40, 44)
  
  gcollapse (mean) freq, by(age_mid)
  rename freq `age_var'
  
  assert age_mid <=60
  list, sep(0)
  tempfile `age_var'
  save ``age_var''
  
  restore
}


* -------------------------------------------
* ASR custom application results
* https://www.ssa.gov/policy/docs/statcomps/di_asr/2020/sect04.pdf

use "$SSDIMed/data/raw/ASR-custom/final/SSDI_ASR_60.dta", clear
assert table == 60
keep if group == "All disabled beneficiaries"

* QC: compare totals in 5-year bins to single year bins, where there is overlap
tab age
qui foreach alo in 45 50 55 60 {
  *local alo 45
  local ahi = `alo' + 4
  noisily di `"btw_eq_`alo'_`ahi'"'
  qui sum total if age == "btw_eq_`alo'_`ahi'"
  local check_sum = r(sum)
  local arg age
  forvalues a = `alo'/`ahi' {
    local arg `arg', "eq_`a'"
  }
  noisily di `"inlist(`arg')"'
  qui sum total if inlist(`arg')
  assert `check_sum' == r(sum)
}

* Keep age bins up to 44, otherwise single year of age. Keep ages 20-60.
drop if inlist(age, "btw_eq_45_49",  "btw_eq_50_54",  "btw_eq_55_59",  "btw_eq_60_64")
drop if inlist(age, "lt_20", "eq_61", "eq_62", "eq_63", "eq_64", "gt_eq_65")

* Generate "midpoint" age for each age category
gen age_hi = real(substr(age, -2, 2))
gen age_lo = .
replace age_lo = age_hi     if strpos(age, "eq_") == 1
replace age_lo = age_hi - 4 if strpos(age, "btw_eq_") == 1
replace age_hi = 19         if strpos(age, "lt_") == 1 & age_hi == 20
replace age_lo = 18         if strpos(age, "lt_") == 1
assert !missing(age_lo, age_hi)
gen age_mid = (age_hi + age_lo)/2
gen age_bin_width = age_hi - age_lo + 1
label var age_mid "Midpoint of age bin range"
label var age_bin_width "Number of ages in age bin"

* QC: what identifies an observation
gisid age_mid year
bys age_mid: assert _N == 10

* Convert "total" to an average freq per age
gen age_dof_asr = total/age_bin_width
label var age_dof_asr "Average applications per year of age in bin"

* Collapse to means, by age bin
bys age_mid: assert age_bin_width ==  age_bin_width[1]
gcollapse (mean) age_dof_asr (first) age_bin_width age_lo age_hi, by(age_mid)
list, sep(0)

tempfile age_dof_asr
save `age_dof_asr'


* -------------------------------------------
* Combine frequency counts by age at date of filing / initial eligibility based on ASR (all applicants) and DAF PUF (awardees)

use `age_dof_asr', clear
merge 1:1 age_mid using `age_dof_daf_cens', assert(match) nogen noreport
merge 1:1 age_mid using `age_doie_daf_cens', assert(match) nogen noreport

* Report frequency counts in thousands
foreach age_var in age_dof_asr age_dof_daf_cens age_doie_daf_cens {
  replace `age_var' = `age_var'/1000
}

order age_mid age_lo age_hi age_bin_width age_dof_asr age_dof_daf_cens age_doie_daf_cens
list, sep(0) ab(20)


* ---------------------------------------------------------------------------------------
* Graph settings
* ---------------------------------------------------------------------------------------

* Set graph style to project default settings
*	Pass as arguments: height width (default 3.5in 6.5in)
do "$SSDIMed/scripts/_auxiliary/_project_grstyle.do" 3.1in 6.5in

local red "179 0 0"
local green "75 115 47"
local blue "48 84 150"
local bluegray "126 153 180"
local brown "140 045 004"
local orange "237 125 49"
local tan "210 180 140"

* ARS: number of applications
local p 1
local color`p' `blue'
grstyle set color "`color`p''": p`p'
grstyle set symbol i: p`p'
grstyle set symbolsize 2.2pt: p`p'
grstyle set lpattern longdash: p`p'
grstyle set linewidth 1.3pt: p`p'

* DAF: applications | award
local p 2
local color`p' `orange'
grstyle set color "`color`p''": p`p'
grstyle set symbol i: p`p'
grstyle set symbolsize 2.2pt: p`p'
grstyle set lpattern shortdash: p`p'
grstyle set linewidth 1.3pt: p`p'

* DAF: entrants (awards)
local p 3
local color`p' `brown'
grstyle set color "`color`p''": p`p'
grstyle set symbol i: p`p'
grstyle set symbolsize 2.2pt: p`p'
grstyle set lpattern solid: p`p'
grstyle set linewidth 1.8pt: p`p'

* DAF: applications | award  -  BACKGROUND
local p 4
local color`p' `orange'
grstyle set color "`color`p''%30": p`p'
grstyle set symbol i: p`p'
grstyle set symbolsize 2.2pt: p`p'
grstyle set lpattern solid: p`p'
grstyle set linewidth 1.3pt: p`p'

* Arrows and lines
local p 5
grstyle set color gs8: p`p'
grstyle set lpattern solid: p`p'
grstyle set linewidth vvthin: p`p'


* -------------------------------------------------------------------------------------------------
* Figure: Cyclicality of DI entry by age at Medicare entry
* -------------------------------------------------------------------------------------------------


if 1 {

* Make min and max labels align on both y axes
* Stata tip 93: Handling multiple y axes on twoway graphs: https://journals.sagepub.com/doi/pdf/10.1177/1536867X1101000411

* How many major ticks?
* How much should range extend beyond ticks (as fraction of tick step size)
local y_ticks 10
local y_margin_lo 0
local y_margin_hi 0.6

* y1 tick lo, step size, format, units
local y1_tick_lo 0
local y1_step 10
local y1_fmt "%9.2g"
local y1_unit "thousand"

* Construct y labels and scales from parameters above
forvalues y = 1/1 {
  local y`y'_tick_hi  = `y`y'_tick_lo' + `y`y'_step' * (`y_ticks' - 1)
  local y`y'_range_lo = `y`y'_tick_lo' - `y`y'_step' * `y_margin_lo'
  local y`y'_range_hi = `y`y'_tick_hi' + `y`y'_step' * `y_margin_hi'
  di "y`y'_tick_hi:  `y`y'_tick_hi'"
  di "y`y'_range_lo: `y`y'_range_lo'"
  di "y`y'_range_hi: `y`y'_range_hi'"
  
  local y`y'_label ylabel(`y`y'_tick_lo'(`y`y'_step')`y`y'_tick_hi', axis(`y') format("`y`y'_fmt'") noticks nolabels labgap(1pt)) 
  local y`y'_scale yscale(`yalt' range(`y`y'_range_lo' `y`y'_range_hi') axis(`y') noline) 
  di `"y`y'_label:  `y`y'_label'"'
  di `"y`y'_scale:  `y`y'_scale'"'
}

* x-axis settings
local xlo 20
local xhi 61
local xscale xscale(range(`xlo' `xhi'))
local x_axis xlabel(22 "20–24" 27 "25–29" 32 "30–34" 37 "35–39" 42 "40–44" 45(5)62) `xscale' xtitle("Age", margin(t=+.5 b=+.1))

* style and color for y-axis title/labels
local y1_title_style sf
local y1_title_color black
local y1_label_style it
local y1_label_color gs8

* y-axis labels placed above grid rule (build manually)
local y 1
local y`y'_la
foreach tick of numlist  `y`y'_tick_lo'(`y`y'_step')`y`y'_tick_hi' {
  if `tick' == `y`y'_tick_hi' {
    * white textbox to go behind label
    local y`y'_la `y`y'_la' text(`=`tick' + `y1_step'/100' `xlo' " ", yaxis(`y') place(ne) just(left) box lwidth(none) color(`y1_label_color') width(20) height(3) bcolor(white%70))
    *label
    local y`y'_la `y`y'_la' text(`tick' `xlo' "{`y`y'_label_style':`=string(`tick', "`y`y'_fmt'")' `y1_unit'}", yaxis(`y') place(ne) just(left) box lwidth(none) color(`y1_label_color') bcolor(none))
  }
  else {
    local y`y'_la `y`y'_la' text(`tick' `xlo' "{`y`y'_label_style':`=string(`tick', "`y`y'_fmt'")'}", yaxis(`y') place(ne) just(left) box lwidth(none) color(`y1_label_color') bcolor(none))
  }
}

* y-axis titles, horizontal orientation (build manually)
local y1title text(`=`y1_tick_lo' + `y1_step'*(`y_ticks' - 1 + `y_margin_hi')' `xlo' "{`y1_title_style':Annual number of DI applicants and entrants per year, by age}"
local y1title `y1title', yaxis(1) place(ne) just(left) box lwidth(none) color(`y1_title_color') bcolor(white) margin(b=+.5))
local y1title `y1title' ytitle(" ", axis(1) margin(l=-8))

* y-axis settings
local y_axis1 `y1_label' `y1_la' `y1_scale' `y1title'

* Plot lines
local line_age_dof_asr         (connected age_dof_asr age_mid, yaxis(1) pstyle(p1))
local line_age_dof_daf_cens    (connected age_dof_daf_cens age_mid, yaxis(1) pstyle(p2))
local line_age_doie_daf_cens   (connected age_doie_daf_cens age_mid, yaxis(1) pstyle(p3))
local line_age_dof_daf_cens_bg (connected age_dof_daf_cens age_mid, yaxis(1) pstyle(p4))
local lines `line_age_dof_daf_cens_bg' `line_age_doie_daf_cens' `line_age_dof_daf_cens' `line_age_dof_asr'

* Label line 1
local age 45
local text_1 text(65 `=`age'-1.5' "Applicants, by age at application"
local text_1 `text_1', yaxis(1) place(w) just(right) box lwidth(none) width(50) bcolor(white%90) size(*0.95))
local pci_1 (pci 64 `=`age'-1.4' 57 `=`age'-0.5', pstyle(p5))

* Label line 2
local age 49
local text_2 text(25 `=`age'-1.2' "Entrants, by age at application"
local text_2 `text_2', yaxis(1) place(w) just(right) box lwidth(none) width(40) bcolor(white%90) size(*0.95))
local pci_2 (pci 24 `=`age'-1.0' 20 `=`age'-0.5', pstyle(p5))

* Label line 3
local age 55
local text_3 text(44 `=`age'-1.3' "Entrants, by age at entry"
local text_3 `text_3', yaxis(1) place(w) just(right) box lwidth(none) width(40) bcolor(white%95) size(*0.95))
local pci_3 (pci 43 `=`age'-1.2' 33 `=`age'-0.3', pstyle(p5))

* Eligibility changes
local text_grid text(8.5 52.5 "Eligibility" "relaxes", place(c) just(center) box lwidth(none) bcolor(white%90))
local xlines_grid xline(50) xline(55)
local pci_grid (pci 8 50.6 7.9 50, pstyle(p5)) (pci 8 54.4 7.9 55, pstyle(p5))

local legend legend(off)
local region graphregion(margin(t=5 b=-1))

* Make graph
twoway `lines' `pci_grid' `pci_1' `pci_2' `pci_3', `x_axis' `y_axis1' `legend' `region' `text_grid' `xlines_grid' yline(0, lpattern(solid) lwidth(thin) lcolor(gs8)) `text_1' `text_2' `text_3'

* Export graph
cap mkdir "$SSDIMed/results/figures"
graph export "$SSDIMed/results/figures/apps_awards_age_count.pdf", as(pdf) fontface("Source Sans Pro") fontdir("$SSDIMed/data/raw/fonts") replace

}




** EOF
